#!/usr/bin/python
# -*- coding: UTF-8 -*- 

import requests
import bs4 
import xlwt

# 爬xml
url = "https://top.zol.com.cn/compositor/57/cell_phone.html"
kv = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_3) AppleWebKit/601.4.4 (KHTML, like Gecko) Version/9.0.3 lang="cn"'}

try:
  response = requests.get(url, headers=kv)
  response.raise_for_status()
  response.encoding = response.apparent_encoding
  file=open("web-get.html","w+", encoding='utf-8')
  file.write(response.text)
  file.close()
  # print(response.text)
except:
  print("web get error: "+ str(response.status_code))
  exit()

# 打开xml
soup = bs4.BeautifulSoup(response.text,'lxml')
main = soup.find('div',{'class':'main'})
dev_list = main.find_all('div',{'class':'rank-list__item clearfix'})
line = len(dev_list)
print("line is:", line)

# 创建一个workbook 设置编码
workbook = xlwt.Workbook(encoding = 'utf-8')
# 创建一个worksheet
worksheet = workbook.add_sheet('Worksheet')
# 标题 - 排名
worksheet.write(0, 0, '排名')
# 标题 - 型号
worksheet.write(0, 1, '型号')
# 标题 - 价格
worksheet.write(0, 2, '价格')
# 标题 - 热度
worksheet.write(0, 3, '热度')
# 标题 - 评分
worksheet.write(0, 4, '评分')
# 标题 - 图片
worksheet.write(0, 5, '图片')
# 标题 - 链接
worksheet.write(0, 6, '链接')

print("worksheet start")

for i in  range(0, line):
  print("at", i)
  # 排名
  try:
    rank__number = dev_list[i].find('div',{'class':'rank__number'}).contents[0]
    worksheet.write(i+1, 0, rank__number)
    print("at",i,0,"rank__number",rank__number)
  except:
    print("at",i,0,"no rank__number")
  # 型号
  try:
    rank__name = dev_list[i].find('div',{'class':'rank__name'}).find('a').contents[0]
    worksheet.write(i+1, 1, rank__name)
    print("at",i,1,"rank__name",rank__name)
  except:
    print("at",i,1,"no rank__name")
  # 价格
  try:
    rank__price = dev_list[i].find('div',{'class':'rank__price'}).contents[0]
    rank__price = rank__price.strip('￥')
    worksheet.write(i+1, 2, rank__price)
    print("at",i,2,"rank__price",rank__price)
  except:
    print("at",i,2,"no rank__price")
  # 热度
  try:
    rate__bar = dev_list[i].find('div',{'class':'rate-bar'}).find('span')['style']
    rate__bar = rate__bar.strip('width:')
    worksheet.write(i+1, 3, rate__bar)
    print("at",i,3,"rate__bar",rate__bar)
  except:
    print("at",i,3,"no rate__bar")
  # 评分
  try:
    star = dev_list[i].find('div',{'class':'star'}).find('em')['style']
    star = star.strip('width:')
    worksheet.write(i+1, 4, star)
    print("at",i,4,"star",star)
  except:
    print("at",i,4,"no star")
  # 图片
  try:
    rank__pic = dev_list[i].find('div',{'class':'rank__pic'}).find('img')['src']
    worksheet.write(i+1, 5, rank__pic)
    print("at",i,5,"rank__pic",rank__pic)
  except:
    print("at",i,5,"no rank__pic")
  # 链接
  try:
    href = dev_list[i].find('div',{'class':'rank__name'}).find('a')['href']
    worksheet.write(i+1, 6, "https:"+href)
    print("at",i,6,"href",href)
  except:
    print("at",i,6,"no href")
  

# 保存
workbook.save('zol-phone.xls')
